********************************************************************************
********************************************************************************
//Following Political Science Students Through Their Methods Training
//Statistics Anxiety, Student Satisfaction, and Final Grades in the Corona Year 2021–22

//Replication Dofile
********************************************************************************
********************************************************************************

set more off
version 17.0
clear all

//Global setup for figures
set scheme s1mono
graph set window fontface "Times New Roman"

//Installing packages
ssc install fre , replace
ssc install rev, replace
ssc install resize, replace
ssc install estout, replace
ssc install coefplot, replace
ssc install asdoc, replace
ssc install regcheck, replace


//Set paths
********************************************************************************	
//Setting the path to the working directory
cd "YOUR WORKING DIRECTORY"

//Create directory for output (if not already created)
cap mkdir "out"

//Loading data
use "pulse_wide_replication_data.dta", clear


********************************************************************************
//Data preparation 
********************************************************************************

//Homework in all courses 
egen homework_all = rowmean(homework_w2 homework_w3 homework_w4)
label var homework_all "Homework in all courses"

//Male
fre male 

//Age
fre age

//Lecture
fre lect

//Master
fre master

//University qualification parents
fre univqual_parents

//Grade Abitur 
fre gradeabitur
gen gradeabitur_rev = 5 - gradeabitur
label variable gradeabitur_rev "Grade Abitur (1-4, 4 = best grade)"

//Immigrant origin student
fre migr_self

//Immigrant origin parents
fre migr_parents

//Education mother
fre edu_m

//Education father
fre edu_f

//Frequency of attendance 
egen attendance = rowtotal(attendance_w2 attendance_w3 attendance_w4)
lab define attendance 0 "No attendance at any meetings" 3 "Attendance at all meetings"
lab val attendance attendance
lab var attendance "Attendance in meetings"

//Statistics anxiety index
alpha statfear_1 statfear_2 statfear_3 statfear_4, item gen(statfear_index)
lab var statfear_index "Statistics anxiety index"
pca statfear_1 statfear_2 statfear_3 statfear_4

//Self efficacy index
alpha selfeffic_1 selfeffic_2 selfeffic_3, item gen(selfeffic_index)
lab var selfeffic_index "Self efficacy index"
pca selfeffic_1 selfeffic_2 selfeffic_3

//Procrastination index
alpha procra_1 procra_2 procra_3 procra_4 procra_5 procra_6 procra_7 procra_8, item gen(procra_index)
lab var procra_index "Procratination index 1-4"
pca procra_1 procra_2 procra_3 procra_4 procra_5 procra_6 procra_7 procra_8

//Unified satisfaction index PCA
pca stf_gen_all stf_orga_all stf_instr_all stf_learn_all
predict stf_pca
sum stf_pca, detail
replace stf_pca = stf_pca - r(min)
label variable stf_pca "Index satisfaction PCA"

//Unified satisfaction index EFA (footnote test for alternative to PCA)
factor stf_gen_all stf_orga_all stf_instr_all stf_learn_all
rotate, promax
predict stf_efa

//Unified satisfaction index EFA (footnote test for alternative to PCA)
sem(STF_cfa -> stf_gen_all stf_orga_all stf_instr_all stf_learn_all), stand
predict STF_cfa, latent

//Correlation different factor solutions
pwcorr stf_pca stf_efa STF_cfa

//Final Grade
rev courses_all_grade, gen(grade_all_courses_rev)
fre courses_all_grade

//Migration variables
gen miggen_second =0
replace miggen_second = 1 if migr_parents >1
label variable miggen_second "Immigrant origin (2nd gen.)"

gen immigrant =0
replace immigrant = 1 if miggen_second ==1 | migr_self ==0
label variable immigrant "Immigrant origin"


********************************************************************************
//Creating Regression Sample & last data preparation
********************************************************************************

*Excluding students who participated in wave 3 and 4 only
drop if pattern == "000X"
drop if pattern == "00X0"

mark nonmiss
markout nonmiss grade_all_courses_rev stf_gen_all stf_orga_all stf_instr_all stf_learn_all master attendance homework_all univqual_parents gradeabitur_rev statfear_index selfeffic_index procra_index male age immigrant
sum grade_all_courses_rev stf_gen_all stf_orga_all stf_instr_all stf_learn_all master attendance homework_all univqual_parents gradeabitur_rev statfear_index selfeffic_index procra_index male age immigrant

drop if nonmiss != 1

//Saving sample for figures
preserve
	keep pid
	save "out\pulse_sample.dta"	, replace
restore

//Standardization of variables - 0/1 for maximized effects
foreach var of varlist statfear_index selfeffic_index procra_index attendance age gradeabitur_rev stf_pca homework_all stf_gen_all stf_orga_all stf_instr_all stf_learn_all {
	sum `var'  
	gen sx`var'=(`var'-r(min))   
	sum sx`var'  
	gen std2`var'=sx`var'/r(max)   
	drop sx`var'
}

//Labeling standardized variables
label variable std2homework_all "Homework done"
label variable std2stf_pca "Unified satisfaction index"
label variable std2statfear_index "Statistics anxiety index"
label variable std2selfeffic_index "Self-efficacy index"
label variable std2procra_index "Procrastination index"
label variable std2attendance "Frequency of attendance"
label variable std2age "Age group"
label variable std2gradeabitur_rev  "Grade Abitur (0-1, 1 = best grade)"
label variable master "Master's programme (ref. bachelor)"
label variable male "Male (ref.: female)"
label variable lect "Lecture (ref.: seminar)"


********************************************************************************
//Creating Figures and Tables
********************************************************************************

**# Figure 1: The Development of Means of Student Satisfaction Items Over Time
********************************************************************************
preserve
use "pulse_long_replication_data.dta", clear //using long dataset
merge m:1 pid using "out\pulse_sample.dta", keep(3)	//using regression sample (n=89)
	
	collapse (mean) stf_gen_? stf_orga_? stf_instr_? stf_learn_?, by(wave)
	
	egen stf_gen_all = rowmean(stf_gen_1 stf_gen_2 stf_gen_3 stf_gen_4 stf_gen_5 stf_gen_6)
	egen stf_orga_all = rowmean(stf_orga_1 stf_orga_2 stf_orga_3 stf_orga_4 stf_orga_5 stf_orga_6)
	egen stf_instr_all = rowmean(stf_instr_1 stf_instr_2 stf_instr_3 stf_instr_4 stf_instr_5 stf_instr_6)
	egen stf_learn_all = rowmean(stf_learn_1 stf_learn_2 stf_learn_3 stf_learn_4 stf_learn_5 stf_learn_6)

	graph twoway line  stf_gen_all stf_orga_all stf_instr_all stf_learn_all wave, scheme(plotplain) ylabel(1(1)7)  ytitle("Satisfaction") xtitle(Wave) ///
		legend(label(1 "Overall course satisfaction") label(2 "Satisfaction with organisation") label(3 "Satisfaction with the instructor") label(4 "Satisfaction with own learning progress") ///
		position(6) row(2)) note("N = 89", span size("small")) name(fig1, replace)
	resize fig1, xsize(14cm) ysize(10cm)		

	graph export "out\fig1_satisfaction_over_time.png", replace
	graph save "out\fig1_satisfaction_over_time.gph", replace
	graph export "out\fig1_satisfaction_over_time.eps", name(fig1) replace

restore

**#Figure 2: Sankey Diagram of Overall Course Satisfaction 
********************************************************************************

*--> see R-File Sankey


**#Figure 3: Multiple Regression Results with Dependent Variable Unified Satisfaction Index (Regression Sample Size n = 89 ) 
**************************************

global control "master lect"

reg  stf_pca $control std2homework_all std2attendance  
eststo M1

reg  stf_pca $control male std2age univqual_parents immigrant std2gradeabitur_rev
eststo M2

reg  stf_pca $control std2statfear_index std2selfeffic_index std2procra_index 
eststo M3

reg  stf_pca $control std2homework_all std2attendance male std2age univqual_parents immigrant std2gradeabitur_rev std2statfear_index std2selfeffic_index std2procra_index 
eststo M4

reg  stf_pca  std2homework_all std2attendance male std2age univqual_parents immigrant std2gradeabitur_rev std2statfear_index std2selfeffic_index std2procra_index i.course
eststo M5

coefplot  M1 || M2 || M3 || M4 || M5, ciopts(lwidth(*0.9)) levels(90) grid(none) drop(_cons *course) xline(0) mlabposition(12) byopts(row(1)) legend(pos(12) size( medium)  row(1)) name(fig3, replace) headings(master = "{bf:Block 1: Controls}" male = "{bf:Block 3: Student characteristics}" std2statfear_index = "{bf:Block 4: Psychological attributes}" std2homework_all = "{bf:Block 2: Study-related variables}") 

resize fig3, xsize(22cm) ysize(10cm)		
	
graph export "out\fig3_satisfaction.png", replace
graph export "out\fig3_satisfaction.eps", name(fig3) replace
graph save "out\fig3_satisfaction.gph", replace	
	

**Figure 4: Multiple Regression Results Using Grades in the Final Exam as the Outcome (Regression Sample Size n = 89 ) 
**************************************
reg  grade_all_courses_rev $control std2stf_pca std2homework_all std2attendance  
eststo M6

reg  grade_all_courses_rev $control male std2age univqual_parents immigrant std2gradeabitur_rev
eststo M7

reg  grade_all_courses_rev $control std2statfear_index std2selfeffic_index std2procra_index 
eststo M8

reg  grade_all_courses_rev $control std2stf_pca std2homework_all std2attendance male std2age univqual_parents immigrant std2gradeabitur_rev std2statfear_index std2selfeffic_index std2procra_index 
eststo M9

reg  grade_all_courses_rev std2stf_pca std2homework_all std2attendance male std2age univqual_parents immigrant std2gradeabitur_rev std2statfear_index std2selfeffic_index std2procra_index i.course
eststo M10

coefplot  M6 || M7 || M8 || M9 || M10, ciopts(lwidth(*0.9)) levels(90) grid(none) drop(_cons *course) xline(0) mlabposition(12) byopts(row(1)) legend(pos(12) size( medium)  row(1)) name(fig4, replace) headings(master = "{bf:Block 1: Controls}" male = "{bf:Block 3: Student characteristics}" std2statfear_index = "{bf:Block 4: Psychological attributes}" std2stf_pca = "{bf:Block 2: Study-related variables}")
resize fig4, xsize(22cm) ysize(10cm)		
	
graph export "out\fig4_grade.png", replace
graph export "out\fig4_grade.eps", name(fig4) replace
graph save "out\fig4_grade.gph", replace	


**# Appendix 
********************************************************************************	
	
**# Table A.1 - Sample Composition for Socio-Demographic Variables in all Four Survey Waves.
*****************************

preserve
use "pulse_wide_replication_data.dta", clear

*Drop observations that do not appear in wave 1
drop if pattern == "000X"
drop if pattern == "00XX"
drop if pattern == "0XXX"
drop if pattern == "0X0X"
drop if pattern == "0X00"
drop if pattern == "0XX0"
drop if pattern == "00X0"

*Recode variables from the original data set
recode migr_parents (1=1) (else =0), gen(migr_parents_nomig)
recode migr_parents (2=1) (else =0), gen(migr_parents_oneparent)
recode migr_parents (3=1) (else =0), gen(migr_parents_bothparents)
recode male (1=0)(0=1)(.=.), gen(female)

*Tab variables based on the 
tab1 male female age edu_m edu_f migr_self migr_parents_nomig migr_parents_oneparent migr_parents_bothparents 
tab1 male female age edu_m edu_f migr_self migr_parents_nomig migr_parents_oneparent migr_parents_bothparents if inlist(pattern,"XX00", "XX0X", "XXX0", "XXXX")
tab1 male female age edu_m edu_f migr_self migr_parents_nomig migr_parents_oneparent migr_parents_bothparents if inlist(pattern,"XXX0", "X0XX", "XXXX")
tab1 male female age edu_m edu_f migr_self migr_parents_nomig migr_parents_oneparent migr_parents_bothparents if inlist(pattern,"XX0X", "XXXX")
restore
	

**# Table A.2 - Survey Participation for Pulse Survey Wave 1 – 4
*****************************	
//Sample Participation 	
preserve
use "pulse_wide_replication_data.dta"	, clear

	//participation
	keep pid pattern
	drop if pattern == "000X"
	drop if pattern == "00X0"

	cap drop t*
	gen t1 = substr(pattern,1,1)
	gen t2 = substr(pattern,2,1)
	gen t3 = substr(pattern,3,1)
	gen t4 = substr(pattern,4,1)

	foreach var of varlist t1 t2 t3 t4{
		replace `var' = "1" if `var' == "X"
		replace `var' = "0" if `var' == "0"
		destring `var', replace
	}
	drop pattern
	
	fre t1 //wave 1
	fre t2 //wave 2
	fre t3 //wave 3
	fre t4 //wave 4
restore
		
**# Table A.3 - Estimation Results for the Principal Components Analysis Used to Form the Unified Satisfaction Index Based on the First Survey Wave
*****************************
*See coding of Unified Satisfaction Index above.


**# Table A.4 - Original Version and English Translation of Questions and Items in the Pulse survey Questionnaire Used in Our Analyses
*****************************	
*No coding necessary		
		
	
**# Table A.5 - Coding of Variables and Descriptive Distributions Based on the Sample Used for Multivariate Analyses (n = 89)
*****************************	
	
cap drop age_* 	
cap drop attendance_*
tab age, gen(age_)
tab attendance, gen(attendance_)
asdoc tabstat stf_gen_all stf_orga_all stf_instr_all stf_learn_all stf_pca grade_all_courses_rev master lect male age  age_* univqual_parents immigrant gradeabitur_rev statfear_index selfeffic_index procra_index homework_all attendance attendance_1 attendance_2 attendance_3 attendance_4, statistics(mean sd min max N)  fs(8) replace dec(2) save(out\appendix_tabA4_sample) replace label

count
	

**# Table A.6 - Coefficient Estimates with Bootstrapped Standard Errors with Dependent Variable Unified Satisfaction Index
*****************************	
reg stf_pca $control std2homework_all std2attendance, level(90) vce(bootstrap, reps(1000) seed(335566))
eststo R1

reg stf_pca $control male std2age univqual_parents immigrant std2gradeabitur_rev, level(90)  vce(bootstrap, reps(1000) seed(335566))
eststo R2

reg stf_pca $control std2statfear_index std2selfeffic_index std2procra_index, level(90) vce(bootstrap, reps(1000) seed(335566))
eststo R3

reg stf_pca $control std2homework_all std2attendance male std2age univqual_parents immigrant std2gradeabitur_rev std2statfear_index std2selfeffic_index std2procra_index, level(90) vce(bootstrap, reps(1000) seed(335566))
eststo R4

reg stf_pca std2homework_all std2attendance male std2age univqual_parents immigrant std2gradeabitur_rev std2statfear_index std2selfeffic_index std2procra_index  i.course, level(90) vce(bootstrap, reps(1000) seed(335566))
eststo R5		
		
esttab R1 R2 R3 R4 R5 using "out\appendix_tabA6_bootstrapping_satisfaction.rtf", star(+ 0.1 * 0.05 ** 0.01 *** 0.001) b(3) se(3) replace label stat(N r2 ar2 bic) nogap 		
	

**# Table A.7: Coefficient Estimates with Bootstrapped Standard Errors with Dependent Variable Final Grade (1 – 5, best grade = 5)
*****************************			
reg grade_all_courses_rev $control std2stf_pca std2homework_all std2attendance, level(90) vce(bootstrap, reps(1000) seed(335566))
eststo R6

reg grade_all_courses_rev $control male std2age univqual_parents immigrant std2gradeabitur_rev, level(90) vce(bootstrap, reps(1000) seed(335566))
eststo R7

reg grade_all_courses_rev $control std2statfear_index std2selfeffic_index std2procra_index, level(90) vce(bootstrap, reps(1000) seed(335566))
eststo R8

reg grade_all_courses_rev $control std2stf_pca std2homework_all std2attendance male std2age univqual_parents immigrant std2gradeabitur_rev std2statfear_index std2selfeffic_index std2procra_index, level(90) vce(bootstrap, reps(1000) seed(335566))
eststo R9

reg grade_all_courses_rev std2stf_pca std2homework_all std2attendance male std2age univqual_parents immigrant std2gradeabitur_rev std2statfear_index std2selfeffic_index std2procra_index i.course, level(90) vce(bootstrap, reps(1000) seed(335566))
eststo R10

esttab R6 R7 R8 R9 R10 using "out\appendix_tabA7_bootstrapping_grade.rtf", star(+ 0.1 * 0.05 ** 0.01 *** 0.001) b(3) se(3) replace label stat(N r2 ar2 bic) nogap 			

		
**# Table A.8: OLS Coefficient Estimates with Dependent Variable Unified Satisfaction Index
*****************************	
reg stf_pca $control std2homework_all std2attendance  
regcheck
eststo M1

reg stf_pca $control male std2age univqual_parents immigrant std2gradeabitur_rev
regcheck
eststo M2

reg stf_pca $control std2statfear_index std2selfeffic_index std2procra_index 
regcheck
eststo M3

reg stf_pca $control std2homework_all std2attendance male std2age univqual_parents immigrant std2gradeabitur_rev std2statfear_index std2selfeffic_index std2procra_index 
eststo M4

reg stf_pca std2homework_all std2attendance male std2age univqual_parents immigrant std2gradeabitur_rev std2statfear_index std2selfeffic_index std2procra_index i.course
regcheck
eststo M5		
		
esttab M1 M2 M3 M4 M5 using "out\appendix_tabA8_satisfaction.rtf", star(+ 0.1 * 0.05 ** 0.01 *** 0.001) b(3) se(3) replace label stat(N r2 ar2 bic) nogap 	

	
**# Table A.9: OLS Coefficient Estimates with Dependent Variable Final Grade (1 – 5, best grade = 5)
*****************************		
reg grade_all_courses_rev $control std2stf_pca std2homework_all std2attendance  
regcheck
eststo M6

reg grade_all_courses_rev $control male std2age univqual_parents immigrant std2gradeabitur_rev
regcheck
eststo M7

reg grade_all_courses_rev $control std2statfear_index std2selfeffic_index std2procra_index 
regcheck
eststo M8

reg grade_all_courses_rev $control std2stf_pca std2homework_all std2attendance male std2age univqual_parents immigrant std2gradeabitur_rev std2statfear_index std2selfeffic_index std2procra_index 
eststo M9

reg grade_all_courses_rev std2stf_pca std2homework_all std2attendance male std2age univqual_parents immigrant std2gradeabitur_rev std2statfear_index std2selfeffic_index std2procra_index i.course
regcheck
eststo M10	
	
esttab M6 M7 M8 M9 M10 using "out\appendix_tabA9_grade.rtf", star(+ 0.1  * 0.05 ** 0.01 *** 0.001) b(3) se(3) replace label stat(N r2 ar2 bic) nogap 	
	

**# Figure A.1: Sankey Diagram of Survey Participation Over the Four Survey Waves
*****************************			
	
*--> see R-File Sankey